Game duration is highly significantly longer in at region 9 and 20, highly significantly shorter at region 3, 5. Game duration is significantly longer at region 10, 38, significantly shorter at region 8, 18.
df$region <- factor(df$region)
model_region <- lm(duration ~ region, data = df)
summary(model_region)
##
## Call:
## lm(formula = duration ~ region, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1795.7 -332.3 -76.3 245.7 4717.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2157.19 29.57 72.953 < 2e-16 ***
## region3 -252.86 30.25 -8.358 < 2e-16 ***
## region5 -137.49 35.25 -3.901 9.66e-05 ***
## region6 67.58 49.39 1.369 0.17119
## region7 216.06 132.65 1.629 0.10339
## region8 -77.00 32.38 -2.378 0.01745 *
## region9 265.95 68.14 3.903 9.56e-05 ***
## region10 520.56 260.31 2.000 0.04555 *
## region13 26.06 57.12 0.456 0.64829
## region14 402.81 518.10 0.777 0.43690
## region15 -59.34 47.18 -1.258 0.20849
## region17 -64.23 109.65 -0.586 0.55800
## region18 -208.42 87.95 -2.370 0.01781 *
## region19 -62.35 105.66 -0.590 0.55517
## region20 1227.56 260.31 4.716 2.44e-06 ***
## region25 -74.44 102.13 -0.729 0.46606
## region38 127.11 46.42 2.739 0.00618 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 517.3 on 9983 degrees of freedom
## Multiple R-squared: 0.04567, Adjusted R-squared: 0.04414
## F-statistic: 29.86 on 16 and 9983 DF, p-value: < 2.2e-16
# scatterplot
plot(df$region, df$duration,
pch = 16, col = "steelblue",
xlab = "Region",
ylab = "Duration",
main = "Duration ~ Region")
#abline(model_region, lwd = 2, col = "firebrick")
# residual plot
plot(model_region)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
For each additional 1s delay in first blood, the game’s total duration is predicted to be 0.114 s shorter on average. Statistically, there’s a tiny negative slope, later first blood is associated with marginally shorter games. Practically, that relationship is negligible. First blood time accounts for virtually none of the variability in game duration.
model_first_blood <- lm(duration ~ first_blood_time, data = df)
summary(model_first_blood)
##
## Call:
## lm(formula = duration ~ first_blood_time, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1121.0 -354.5 -96.4 256.2 4753.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1991.7149 8.5456 233.071 <2e-16 ***
## first_blood_time -0.1144 0.0455 -2.515 0.0119 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 528.9 on 9998 degrees of freedom
## Multiple R-squared: 0.0006324, Adjusted R-squared: 0.0005324
## F-statistic: 6.327 on 1 and 9998 DF, p-value: 0.01191
# scatterplot
plot(df$first_blood_time, df$duration,
pch = 16, col = "steelblue",
xlab = "First Blood Time",
ylab = "Duration",
main = "Duration ~ First Blood Time")
abline(model_first_blood, lwd = 2, col = "firebrick")
# residual plot
plot(model_first_blood)
There is a positive slope. For each one unit increase in dire score, the game duration increases by 14.4s on average.
model_dire_score <- lm(duration ~ dire_score, data = df)
summary(model_dire_score)
##
## Call:
## lm(formula = duration ~ dire_score, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1201.0 -326.5 -86.7 236.0 4428.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1558.9239 12.8889 121 <2e-16 ***
## dire_score 14.4128 0.4118 35 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 499.4 on 9998 degrees of freedom
## Multiple R-squared: 0.1092, Adjusted R-squared: 0.1091
## F-statistic: 1225 on 1 and 9998 DF, p-value: < 2.2e-16
# scatterplot
plot(df$dire_score, df$duration,
pch = 16, col = "steelblue",
xlab = "Dire Score",
ylab = "Duration",
main = "Duration ~ Dire Score")
abline(model_dire_score, lwd = 2, col = "firebrick")
# residual plot
plot(model_dire_score)
There is a positive slope. For each one unit increase in radiant score, the game duration increases by 12.6s on average.
model_radiant_score <- lm(duration ~ radiant_score, data = df)
summary(model_radiant_score)
##
## Call:
## lm(formula = duration ~ radiant_score, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1261.9 -324.3 -97.1 236.1 4315.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1613.0045 13.4045 120.33 <2e-16 ***
## radiant_score 12.6324 0.4331 29.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 507.9 on 9998 degrees of freedom
## Multiple R-squared: 0.07842, Adjusted R-squared: 0.07833
## F-statistic: 850.8 on 1 and 9998 DF, p-value: < 2.2e-16
# scatterplot
plot(df$radiant_score, df$duration,
pch = 16, col = "steelblue",
xlab = "Radiant Score",
ylab = "Duration",
main = "Duration ~ Radiant Score")
abline(model_radiant_score, lwd = 2, col = "firebrick")
# residual plot
plot(model_radiant_score)
The scatterplot looks obviously polynomial/normal, simple linear regression is not a good way to model the data.
model_exp_15min <- lm(duration ~ exp_15min, data = df)
summary(model_exp_15min)
##
## Call:
## lm(formula = duration ~ exp_15min, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1141.7 -355.4 -98.6 255.6 4747.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.975e+03 5.290e+00 373.371 <2e-16 ***
## exp_15min -3.250e-03 1.277e-03 -2.545 0.0109 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 528.9 on 9998 degrees of freedom
## Multiple R-squared: 0.0006474, Adjusted R-squared: 0.0005475
## F-statistic: 6.477 on 1 and 9998 DF, p-value: 0.01094
# scatterplot
plot(df$exp_15min, df$duration,
pch = 16, col = "steelblue",
xlab = "Experience gained at 15 mins",
ylab = "Duration",
main = "Duration ~ Experience gained at 15 mins")
abline(model_exp_15min, lwd = 2, col = "firebrick")
# residual plot
plot(model_exp_15min)
There is a positive correlation between team fight duration and total duration.
model_team_duration <- lm(duration ~ teamfight_duration, data = df)
summary(model_team_duration)
##
## Call:
## lm(formula = duration ~ teamfight_duration, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1122.6 -332.2 -102.5 239.6 4463.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.629e+03 1.246e+01 130.73 <2e-16 ***
## teamfight_duration 1.038e+00 3.423e-02 30.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 506.3 on 9998 degrees of freedom
## Multiple R-squared: 0.08422, Adjusted R-squared: 0.08413
## F-statistic: 919.5 on 1 and 9998 DF, p-value: < 2.2e-16
# scatterplot
plot(df$teamfight_duration, df$duration,
pch = 16, col = "steelblue",
xlab = "Team fight duration",
ylab = "Duration",
main = "Duration ~ Team fight duration")
abline(model_team_duration, lwd = 2, col = "firebrick")
# residual plot
plot(model_team_duration)
There is a positive slope. For one additional team fight death, the total duration increases by 11s.
model_team_death <- lm(duration ~ Tteamfight_deaths, data = df)
summary(model_team_death)
##
## Call:
## lm(formula = duration ~ Tteamfight_deaths, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1073.0 -331.2 -105.7 239.6 4412.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1638.6520 12.1672 134.68 <2e-16 ***
## Tteamfight_deaths 11.2367 0.3698 30.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 506.2 on 9998 degrees of freedom
## Multiple R-squared: 0.08454, Adjusted R-squared: 0.08444
## F-statistic: 923.2 on 1 and 9998 DF, p-value: < 2.2e-16
# scatterplot
plot(df$Tteamfight_deaths, df$duration,
pch = 16, col = "steelblue",
xlab = "Team fight deaths",
ylab = "Duration",
main = "Duration ~ Team fight deaths")
abline(model_team_death, lwd = 2, col = "firebrick")
# residual plot
plot(model_team_death)
There is a positive slope. For one additional team fight, the total duration increases by 49.6s.
model_team_frequency <- lm(duration ~ teamfight_frequency, data = df)
summary(model_team_frequency)
##
## Call:
## lm(formula = duration ~ teamfight_frequency, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1098.4 -333.6 -103.1 241.7 4373.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1620.296 12.811 126.48 <2e-16 ***
## teamfight_frequency 49.581 1.646 30.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 506.6 on 9998 degrees of freedom
## Multiple R-squared: 0.08324, Adjusted R-squared: 0.08315
## F-statistic: 907.8 on 1 and 9998 DF, p-value: < 2.2e-16
# scatterplot
plot(df$teamfight_frequency, df$duration,
pch = 16, col = "steelblue",
xlab = "Team fight frequency",
ylab = "Duration",
main = "Duration ~ Team fight frequency")
abline(model_team_frequency, lwd = 2, col = "firebrick")
# residual plot
plot(model_team_frequency)
df$Strength_picked_r <- factor(df$Strength_picked_r)
model_strr <- lm(duration ~ Strength_picked_r, data = df)
summary(model_strr)
##
## Call:
## lm(formula = duration ~ Strength_picked_r, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1107 -356 -98 254 4761
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1966.975 14.935 131.704 <2e-16 ***
## Strength_picked_r1 8.985 15.971 0.563 0.574
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 529.1 on 9998 degrees of freedom
## Multiple R-squared: 3.166e-05, Adjusted R-squared: -6.836e-05
## F-statistic: 0.3165 on 1 and 9998 DF, p-value: 0.5737
# scatterplot
plot(df$Strength_picked_r, df$duration,
pch = 16, col = "steelblue",
xlab = "Strength heroes picked by Radiant",
ylab = "Duration",
main = "Duration ~ Strength heroes picked by Radiant")
abline(model_strr, lwd = 2, col = "firebrick")
# residual plot
plot(model_strr)
df$Strength_picked_d <- factor(df$Strength_picked_d)
model_strd <- lm(duration ~ Strength_picked_d, data = df)
summary(model_strd)
##
## Call:
## lm(formula = duration ~ Strength_picked_d, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1113.4 -356.6 -98.0 254.0 4749.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1987.40 15.00 132.490 <2e-16 ***
## Strength_picked_d1 -14.35 16.03 -0.895 0.371
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 529.1 on 9998 degrees of freedom
## Multiple R-squared: 8.015e-05, Adjusted R-squared: -1.986e-05
## F-statistic: 0.8014 on 1 and 9998 DF, p-value: 0.3707
# scatterplot
plot(df$Strength_picked_d, df$duration,
pch = 16, col = "steelblue",
xlab = "Strength heroes picked by Dire",
ylab = "Duration",
main = "Duration ~ Strength heroes picked by Dire")
abline(model_strd, lwd = 2, col = "firebrick")
# residual plot
plot(model_strd)
Involving intelligence heroes shortens the total game length.
df$Intelligence_picked_r <- factor(df$Intelligence_picked_r)
model_intr <- lm(duration ~ Intelligence_picked_r, data = df)
summary(model_intr)
##
## Call:
## lm(formula = duration ~ Intelligence_picked_r, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1135.0 -354.0 -95.8 256.2 4724.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2013.02 12.11 166.206 < 2e-16 ***
## Intelligence_picked_r1 -47.18 13.46 -3.505 0.000459 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 528.8 on 9998 degrees of freedom
## Multiple R-squared: 0.001227, Adjusted R-squared: 0.001127
## F-statistic: 12.28 on 1 and 9998 DF, p-value: 0.0004591
# scatterplot
plot(df$Intelligence_picked_r, df$duration,
pch = 16, col = "steelblue",
xlab = "Intelligence heroes picked by Radiant",
ylab = "Duration",
main = "Duration ~ Intelligence heroes picked by Radiant")
abline(model_intr, lwd = 2, col = "firebrick")
# residual plot
plot(model_intr)
Selecting Intelligence hero shortens the total game length.
df$Intelligence_picked_d <- factor(df$Intelligence_picked_d)
model_intd <- lm(duration ~ Intelligence_picked_d, data = df)
summary(model_intd)
##
## Call:
## lm(formula = duration ~ Intelligence_picked_d, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1120.2 -356.5 -96.5 255.5 4768.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2004.15 12.59 159.236 <2e-16 ***
## Intelligence_picked_d1 -35.61 13.87 -2.567 0.0103 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 528.9 on 9998 degrees of freedom
## Multiple R-squared: 0.0006588, Adjusted R-squared: 0.0005588
## F-statistic: 6.591 on 1 and 9998 DF, p-value: 0.01026
# scatterplot
plot(df$Intelligence_picked_d, df$duration,
pch = 16, col = "steelblue",
xlab = "Intelligence heroes picked by Dire",
ylab = "Duration",
main = "Duration ~ Intelligence heroes picked by Dire")
abline(model_intd, lwd = 2, col = "firebrick")
# residual plot
plot(model_intd)
# Duration ~ Agility heroes picked by Radiant
df$Agility_picked_r <- factor(df$Agility_picked_r)
model_agir <- lm(duration ~ Agility_picked_r, data = df)
summary(model_agir)
##
## Call:
## lm(formula = duration ~ Agility_picked_r, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1116.4 -355.5 -97.5 255.6 4766.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1995.37 12.62 158.151 <2e-16 ***
## Agility_picked_r1 -24.92 13.90 -1.793 0.073 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 529 on 9998 degrees of freedom
## Multiple R-squared: 0.0003214, Adjusted R-squared: 0.0002214
## F-statistic: 3.214 on 1 and 9998 DF, p-value: 0.07304
# scatterplot
plot(df$Agility_picked_r, df$duration,
pch = 16, col = "steelblue",
xlab = "Agility heroes picked by Radiant",
ylab = "Duration",
main = "Duration ~ Agility heroes picked by Radiant")
abline(model_agir, lwd = 2, col = "firebrick")
# residual plot
plot(model_agir)
df$Agility_picked_d <- factor(df$Agility_picked_d)
model_agid <- lm(duration ~ Agility_picked_d, data = df)
summary(model_agid)
##
## Call:
## lm(formula = duration ~ Agility_picked_d, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1103.6 -356.6 -98.6 254.4 4764.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1985.58 12.68 156.594 <2e-16 ***
## Agility_picked_d1 -13.01 13.95 -0.932 0.351
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 529.1 on 9998 degrees of freedom
## Multiple R-squared: 8.696e-05, Adjusted R-squared: -1.305e-05
## F-statistic: 0.8695 on 1 and 9998 DF, p-value: 0.3511
# scatterplot
plot(df$Agility_picked_d, df$duration,
pch = 16, col = "steelblue",
xlab = "Agility heroes picked by Dire",
ylab = "Duration",
main = "Duration ~ Agility heroes picked by Dire")
abline(model_agid, lwd = 2, col = "firebrick")
# residual plot
plot(model_agid)
Selecting intelligence hero increases the total game length.
df$Universal_picked_r <- factor(df$Universal_picked_r)
model_unir <- lm(duration ~ Universal_picked_r, data = df)
summary(model_unir)
##
## Call:
## lm(formula = duration ~ Universal_picked_r, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1117.8 -352.3 -97.4 254.2 4745.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1928.07 10.24 188.356 < 2e-16 ***
## Universal_picked_r1 63.74 11.95 5.333 9.86e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 528.3 on 9998 degrees of freedom
## Multiple R-squared: 0.002837, Adjusted R-squared: 0.002737
## F-statistic: 28.44 on 1 and 9998 DF, p-value: 9.861e-08
# scatterplot
plot(df$Universal_picked_r, df$duration,
pch = 16, col = "steelblue",
xlab = "Universal heroes picked by Radiant",
ylab = "Duration",
main = "Duration ~ Universal heroes picked by Radiant")
abline(model_unir, lwd = 2, col = "firebrick")
# residual plot
plot(model_unir)
Selecting intelligence hero increases the total game length.
df$Universal_picked_d <- factor(df$Universal_picked_d)
model_unid <- lm(duration ~ Universal_picked_d, data = df)
summary(model_unid)
##
## Call:
## lm(formula = duration ~ Universal_picked_d, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1123.4 -353.4 -96.5 254.5 4739.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1916.512 9.993 191.782 < 2e-16 ***
## Universal_picked_d1 80.888 11.769 6.873 6.66e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 527.8 on 9998 degrees of freedom
## Multiple R-squared: 0.004703, Adjusted R-squared: 0.004603
## F-statistic: 47.24 on 1 and 9998 DF, p-value: 6.66e-12
# scatterplot
plot(df$Universal_picked_d, df$duration,
pch = 16, col = "steelblue",
xlab = "Number of Universal heroes picked by Dire",
ylab = "Duration",
main = "Duration ~ Number of Universal heroes picked by Dire")
abline(model_unid, lwd = 2, col = "firebrick")
# residual plot
plot(model_unid)